In [20]:
import fastcluster
import numpy as np
import numpy.random as npr
In [2]:
from sklearn.datasets import make_blobs
In [116]:
X,y = make_blobs(1000)
In [117]:
import pylab as pl
%matplotlib inline
In [118]:
pl.scatter(X[:,0],X[:,1],c=y,linewidths=0)
Out[118]:
In [119]:
%timeit linkage = fastcluster.linkage(X)
In [120]:
linkage = fastcluster.linkage(X)
In [121]:
linkage.shape
Out[121]:
In [122]:
linkage[0]
Out[122]:
In [43]:
fastcluster.
In [123]:
pl.plot(linkage[:,2])
Out[123]:
In [124]:
pl.plot(linkage[-10:,2])
Out[124]:
In [125]:
from scipy.cluster import hierarchy
In [126]:
hierarchy.dendrogram(linkage)
In [127]:
pl.plot(linkage[:,3])
Out[127]:
In [128]:
linkage[0][:2]
Out[128]:
In [21]:
merge = np.array(linkage[:,:2],dtype=int)
In [23]:
height = linkage[:,2]
In [33]:
fastcluster.expand_dims(
In [32]:
def tpe2d(d):
hc = fastcluster(d)
merge = np.array(linkage[:,:2],dtype=int)
height = linkage[:,2]
n = len(height) # n = len(d) - 1
ind = np.zeros((n,2))
x = np.zeros((n,2,2))
for i in range(n):
dmin = height[i]
pair = merge[i]
if max(pair) < 0:
ind[i] = -pair
#x[[i]] <- rbind(c(0,0),c(dmin,0))
x[i] = np.zeros((2,2))
x[i,1,0]=dmin
else:
if min(pair) < 0:
c1 = -min(pair)
c2 = ind[max(pair)]
ind[i] = np.array((c1,c2))
return
In [25]:
def align2d(d,x1,x2,dmin):
return
In [26]:
def rot2d(theta):
return
In [27]:
def rigid2d(x1,x2,par):
return
In [28]:
def sumt(obj,pen,par0):
return
In [29]:
np.min(merge)
Out[29]:
In [30]:
np.max(merge)
Out[30]:
In [129]:
linkage = fastcluster.linkage(X)
In [139]:
linkage = hierarchy.linkage(X)
In [ ]:
hierarchy.linkage(
In [140]:
merge=np.array(linkage[:,:2],dtype=int)
height = linkage[:,2]
In [141]:
np.max(merge)
Out[141]:
In [144]:
n = len(height)
In [145]:
S = [[i] for i in range(n)]
In [146]:
len(S)
Out[146]:
In [147]:
len(merge),len(height)
Out[147]:
In [148]:
np.max(merge)
Out[148]:
In [164]:
I = [np.array(range(n))]
In [178]:
from sklearn.manifold import MDS
In [184]:
mds = MDS(verbose=1,dissimilarity='precomputed')
In [181]:
from scipy.spatial import distance
pdist = distance.squareform(distance.pdist(X))
In [183]:
pdist.shape
Out[183]:
In [194]:
X_ = mds.fit_transform(pdist[:500,:500])
In [195]:
pl.scatter(X_[:,0],X_[:,1],
c=y[:500],alpha=0.5,linewidths=0)
Out[195]:
In [168]:
for i in range(n-1):
a,b = merge[i]
d = height[i]
S.append(S[a] + S[b])
I.append(np.hstack((I[i][(I[i] != a) * (I[i] != b)],1)))
U =
In [152]:
S[-1]
Out[152]:
In [153]:
(S[1]+S[2])+S[3]
Out[153]:
In [203]:
X = npr.randn(20000,10)
In [204]:
%timeit linkage = hierarchy.linkage(X)
In [205]:
%timeit linkage = fastcluster.linkage(X)
In [206]:
%timeit linkage = fastcluster.linkage_vector(X)
In [207]:
X,y = make_blobs(20000)
In [208]:
pl.scatter(X[:,0],X[:,1],c=y,linewidths=0)
Out[208]:
In [209]:
%timeit linkage = fastcluster.linkage_vector(X)
In [210]:
%timeit linkage = fastcluster.linkage(X)
In [211]:
%timeit linkage = hierarchy.linkage(X)
In [ ]: